Introduction

This markdown provides comparison between two meQTL analysis after LD clumping. meQTL analysis were done using the R package MatrixEQTL. LD clumping was done using PLINK

For all analysis, the same DNAm data were used which underwent following QC steps:

DNAm QC

The genotype data were different for each analysis:

  1. The SNP data were used where the uncertain genotype was replaced with the most likely genotype

  2. SNP probabilities, i.e. dosages, were used

For all three analysis, the imputed QC SNP data (no LD pruning, the MHC regions are included) were used. The data includes 3’957’338 SNPs.

All meQTLs passed the significance threshold of FDR = 0.05.

LD clumping was done per CpG using the following parameters:

–clump-p1 = 0.05: Significance threshold for index SNPs

–clump-p2 = 1: Secondary significance threshold for clumped SNPs

–clump-r2 = 0.2: LD threshold for clumping

–clump-kb = 200: Physical distance threshold for clumping

meQTL analysis: using SNPs’ probabilities

Significant Hits, FDR < 0.05

x %>%
    ggplot(aes(y = cnt, x = Var2, fill = as.factor(as.numeric(Var2)))) + geom_col() + geom_text(aes(label = comma(cnt,
    accuracy = 1L), y = cnt), stat = "identity", vjust = -0.2, size = 2.5) + scale_y_continuous(labels = scientific) +
    # scale_fill_viridis(discrete = TRUE, alpha = 0.9, option = 'C') +
labs(title = " ", y = "Count", x = " ") + facet_wrap(~Var1, ncol = 3) + theme(legend.position = "none",
    legend.title = element_blank(), panel.grid.major = element_blank(), panel.background = element_blank(),
    plot.title = element_text(size = 10), axis.title = element_text(size = 8), axis.title.x = element_blank()) +
    scale_fill_manual(values = cbPalette[c(1, 2)])

Scatter plot

fdr.thr <- 0.05
plot.title <- paste0("Significant at FDR <= ", fdr.thr, " meQTLs.", "\nResult of MatrixEQTL")
GetScatterPlot2(meqtl.all.prob.df, plot.title = plot.title, cbPalette = cbPalette)

Upset plots

meQTL level

meqtls.meqtls <- list(delta = meqtl.all.prob.df[treatment == "delta", meQTL_ID], veh = meqtl.all.prob.df[treatment ==
    "baseline", meQTL_ID])

intersect.meqtls <- intersect(meqtls.meqtls$delta, meqtls.meqtls$veh)
perc.olap.delta.with.veh <- scales::percent(length(intersect.meqtls)/length(meqtls.meqtls$delta), accuracy = 0.1)
perc.olap.veh.with.delta <- scales::percent(length(intersect.meqtls)/length(meqtls.meqtls$veh), accuracy = 0.1)

plot.title <- "Number of intersections between delta and baseline meQTLs"

ggVennDiagram::ggVennDiagram(meqtls.meqtls, category.names = c(paste0("GR-response, ", perc.olap.delta.with.veh),
    paste0("Baseline, ", perc.olap.veh.with.delta)), label_alpha = 0.7, edge_size = 0, set_geom = "text",
    set_color = "black", label = "count") + theme(legend.position = "none", legend.title = element_blank(),
    panel.background = element_blank(), plot.title = element_text(size = 10, face = "italic")) + labs(x = "",
    y = "", title = plot.title) + scale_color_manual(values = cbPalette) + scale_fill_gradient(low = alpha(cbPalette[2],
    0.5), high = alpha(cbPalette[1], 0.5))

SNP level

meqtls.snps <- list(delta = meqtl.all.prob.df[treatment == "delta", SNP], veh = meqtl.all.prob.df[treatment ==
    "baseline", SNP])

intersect <- intersect(meqtls.snps$delta, meqtls.snps$veh)
perc.olap.delta.with.veh <- scales::percent(length(intersect)/length(meqtls.snps$delta), accuracy = 0.1)
perc.olap.veh.with.delta <- scales::percent(length(intersect)/length(meqtls.snps$veh), accuracy = 0.1)

plot.title <- "Number of intersections between delta and baseline meQTL SNPs"

ggVennDiagram::ggVennDiagram(meqtls.snps, category.names = c(paste0("GR-response, ", perc.olap.delta.with.veh),
    paste0("Baseline, ", perc.olap.veh.with.delta)), label_alpha = 0.7, edge_size = 0, set_geom = "text",
    set_color = "black", label = "count") + theme(legend.position = "none", legend.title = element_blank(),
    panel.background = element_blank(), plot.title = element_text(size = 10, face = "italic")) + labs(x = "",
    y = "", title = plot.title) + scale_color_manual(values = cbPalette) + scale_fill_gradient(low = alpha(cbPalette[2],
    0.5), high = alpha(cbPalette[1], 0.5))

CpG level

meqtls.cpg <- list(delta = meqtl.all.prob.df[treatment == "delta", CpG_ID], veh = meqtl.all.prob.df[treatment ==
    "baseline", CpG_ID])

intersect <- intersect(meqtls.cpg$delta, meqtls.cpg$veh)
perc.olap.delta.with.veh <- scales::percent(length(intersect)/length(meqtls.cpg$delta), accuracy = 0.1)
perc.olap.veh.with.delta <- scales::percent(length(intersect)/length(meqtls.cpg$veh), accuracy = 0.1)

plot.title <- "Number of intersections between delta and baseline meQTL CpGs"

ggVennDiagram::ggVennDiagram(meqtls.cpg, category.names = c(paste0("GR-response, ", perc.olap.delta.with.veh),
    paste0("Baseline, ", perc.olap.veh.with.delta)), label_alpha = 0.7, edge_size = 0, set_geom = "text",
    set_color = "black", label = "count") + theme(legend.position = "none", legend.title = element_blank(),
    panel.background = element_blank(), plot.title = element_text(size = 10, face = "italic")) + labs(x = "",
    y = "", title = plot.title) + scale_color_manual(values = cbPalette) + scale_fill_gradient(low = alpha(cbPalette[2],
    0.5), high = alpha(cbPalette[1], 0.5))

meQTLs analysis: using the most likely genotype (mode)

Significant Hits, FDR < 0.05

x %>%
    ggplot(aes(y = cnt, x = Var2, fill = as.factor(as.numeric(Var2)))) + geom_col() + geom_text(aes(label = comma(cnt,
    accuracy = 1L), y = cnt), stat = "identity", vjust = -0.2, size = 2.5) + scale_y_continuous(labels = scientific) +
    # scale_fill_viridis(discrete = TRUE, alpha = 0.9, option = 'C') +
labs(title = " ", y = "Count", x = " ") + facet_wrap(~Var1, ncol = 3) + theme(legend.position = "none",
    legend.title = element_blank(), panel.grid.major = element_blank(), panel.background = element_blank(),
    plot.title = element_text(size = 10), axis.title = element_text(size = 8), axis.title.x = element_blank()) +
    scale_fill_manual(values = cbPalette[c(1, 2)])

Overlap between meQTLs from different approaches

Probabilities vs most likely genotype

meqtls.delta <- list(prob = meqtl.all.prob.df[treatment == "delta", meQTL_ID], mode = meqtl.all.mode.df[treatment ==
    "delta", meQTL_ID])

meqtls.veh <- list(prob = meqtl.all.prob.df[treatment == "baseline", meQTL_ID], mode = meqtl.all.mode.df[treatment ==
    "baseline", meQTL_ID])

delta GR-induced meQTLs

euler.tbl <- euler(meqtls.delta)

plot(euler.tbl, quantities = list(type = c("counts", "percent")), fills = c("lightgrey", "brown"), labels = c("probabilities",
    " most likely genotype"))

baseline meQTLs

euler.tbl <- euler(meqtls.veh)

plot(euler.tbl, quantities = list(type = c("counts", "percent")), fills = c("lightgrey", "brown"), labels = c("probabilities",
    " most likely genotype"))

mecpgs.delta <- list(prob = meqtl.all.prob.df[treatment == "delta", CpG_ID] %>%
    unique(), mode = meqtl.all.mode.df[treatment == "delta", CpG_ID] %>%
    unique())

mecpgs.veh <- list(prob = meqtl.all.prob.df[treatment == "baseline", CpG_ID] %>%
    unique(), mode = meqtl.all.mode.df[treatment == "baseline", CpG_ID] %>%
    unique())

delta GR-induced meCpGs

euler.tbl <- euler(mecpgs.delta)

plot(euler.tbl, quantities = list(type = c("counts", "percent")), fills = c("lightgrey", "brown"), labels = c("probabilities",
    " most likely genotype"))

baseline meCpGs

euler.tbl <- euler(mecpgs.veh)

plot(euler.tbl, quantities = list(type = c("counts", "percent")), fills = c("lightgrey", "brown"), labels = c("probabilities",
    " most likely genotype"))

mesnps.delta <- list(prob = meqtl.all.prob.df[treatment == "delta", SNP] %>%
    unique(), mode = meqtl.all.mode.df[treatment == "delta", SNP] %>%
    unique())

mesnps.veh <- list(prob = meqtl.all.prob.df[treatment == "baseline", SNP] %>%
    unique(), mode = meqtl.all.mode.df[treatment == "baseline", SNP] %>%
    unique())

delta GR-induced meSNPs

euler.tbl <- euler(mesnps.delta)

plot(euler.tbl, quantities = list(type = c("counts", "percent")), fills = c("lightgrey", "brown"), labels = c("probabilities",
    " most likely genotype"))

baseline meSNPs

euler.tbl <- euler(mesnps.veh)

plot(euler.tbl, quantities = list(type = c("counts", "percent")), fills = c("lightgrey", "brown"), labels = c("probabilities",
    " most likely genotype"))

Case visualisation of delta GR-induced meQTL results

meqtls.delta <- list(prob = meqtl.all.prob.df[treatment == "delta", meQTL_ID], 
                     miss = NULL, # meqtl.all.na.df[treatment == "delta", meQTL_ID], 
                     mode = meqtl.all.mode.df[treatment == "delta", meQTL_ID])

The example of meQTL which is the most significant only for “the most likely genotype” case

unique.mode.meqtl <- setdiff(meqtls.delta$mode, union(meqtls.delta$prob, meqtls.delta$miss))
unique.mode.meqtl.df <- meqtl.all.mode.df[treatment == "delta"][meQTL_ID %in% unique.mode.meqtl]
selected.meqtl <- unique.mode.meqtl.df[1]

kable(selected.meqtl %>%
    select(SNP, CpG_ID, FC = beta, `t-stat`, `p-value`, FDR = fdr, Treatment = treatment) %>%
    mutate_if(is.numeric, funs(as.character(signif(., 3)))))
SNP CpG_ID FC t-stat p-value FDR Treatment
rs10249476 cg00011113 0.0208 5.75 3.48e-08 8.72e-08 delta
ProcessGetBoxPlot(methyl.beta.veh.df, methyl.beta.dex.df, snp.df, selected.meqtl, fdr.thr = 0.05)

The example of meQTL which is the most significant only for the “dosage” case

unique.meqtl <- setdiff(meqtls.delta$prob, union(meqtls.delta$mode, meqtls.delta$miss))
unique.meqtl.df <- meqtl.all.prob.df[treatment == "delta"][meQTL_ID %in% unique.meqtl]
selected.meqtl <- unique.meqtl.df[1]

kable(selected.meqtl %>%
    select(SNP, CpG_ID, FC = beta, `t-stat`, `p-value`, FDR = fdr, Treatment = treatment) %>%
    mutate_if(is.numeric, funs(as.character(signif(., 3)))))
SNP CpG_ID FC t-stat p-value FDR Treatment
rs35879900 cg26175789 -0.031 -5.44 1.62e-07 0.00875 delta
ProcessGetBoxPlot(methyl.beta.veh.df, methyl.beta.dex.df, snp.df, selected.meqtl, fdr.thr = 0.05)

The example of meQTL which is significant for both cases

meqtl.lst <- intersect(meqtls.delta$mode, meqtls.delta$prob)

The most signififcant from the analysis where uncertain genotypes were replaced with the most likely one

meqtl.df <- meqtl.all.mode.df[treatment == "delta"][meQTL_ID %in% meqtl.lst]
selected.meqtl <- meqtl.df[1]

kable(selected.meqtl %>%
    select(SNP, CpG_ID, FC = beta, `t-stat`, `p-value`, FDR = fdr, Treatment = treatment) %>%
    mutate_if(is.numeric, funs(as.character(signif(., 3)))))
SNP CpG_ID FC t-stat p-value FDR Treatment
rs114232786 cg00020172 0.0452 6.41 1.06e-09 4.03e-09 delta
selected.meqtl <- meqtl.df[1]

ProcessGetBoxPlot(methyl.beta.veh.df, methyl.beta.dex.df, snp.df, selected.meqtl, fdr.thr = 0.05)

The most signififcant from the analysis where dosages were used

meqtl.df <- meqtl.all.prob.df[treatment == "delta"][meQTL_ID %in% meqtl.lst]
selected.meqtl <- meqtl.df[1]

kable(selected.meqtl %>%
    select(SNP, CpG_ID, FC = beta, `t-stat`, `p-value`, FDR = fdr, Treatment = treatment) %>%
    mutate_if(is.numeric, funs(as.character(signif(., 3)))))
SNP CpG_ID FC t-stat p-value FDR Treatment
rs11107749 cg08922308 0.0461 8 1.1e-13 4.93e-08 delta
selected.meqtl <- meqtl.df[1]

ProcessGetBoxPlot(methyl.beta.veh.df, methyl.beta.dex.df, snp.df, selected.meqtl, fdr.thr = 0.05)